NB: These analyses only include IATs that tested the association of two groups with “good” and “bad.”
pacman::p_load_gh("lukaswallrich/timesaveR", "bbc/bbplot")
pacman::p_load(bookdown, magrittr, here, dplyr, readr, purrr, stringr, googleCloudStorageR, naniar, ggplot2, patchwork, haven, broom, pwr, lavaan, gt)
set.seed(300688)
gcs_auth("~/gcs_key.json")
gcs_global_bucket("iat_data")
iat_type <- tribble(
~iat, ~type,
"Skin tone", "ethnic",
"Rel: Judaism vs. Islam", "religion",
"Rel: Christianity vs. Judaism", "religion",
"Rel: Christianity vs. Islam", "religion",
"Race: White vs Black", "ethnic",
"Disability", "fitness",
"Arab Muslims", "ethnic",
"Age", "fitness",
"Weight", "fitness",
"Sexuality", "sex & gender",
"Transgender", "sex & gender"
)
if (!file.exists(here::here("df_mini_2020.RDS"))) {
gcs_get_object("iat_df_mini_with_weights_2020.RDS", saveToDisk = here::here("df_mini_2020.RDS"))
}
iat_data_2020 <- readr::read_rds(here::here("df_mini_2020.RDS"))
iat_data_2020 <- iat_data_2020 %>%
mutate(month = lubridate::month(test_date), ym = paste0(year, "_", month)) %>%
left_join(iat_type) %>%
filter(iat != "Arab Muslims")
if (!file.exists(here::here("df_mini.RDS"))) {
gcs_get_object("iat_df_good_bad_mini_with_weights.RDS", saveToDisk = here::here("df_mini.RDS"))
}
iat_data <- readr::read_rds(here::here("df_mini.RDS"))
iat_data <- iat_data %>% filter(year >= 2006, year <= 2020) %>%
mutate(month = lubridate::month(test_date), ym = paste0(year, "_", month)) %>%
left_join(iat_type)
test_descriptions <- tibble::tribble(
~Test, ~Association, ~Stimuli, ~"Explicit scale", ~"Status coding", ~Note,
"Age", "Young people vs old people & good vs bad", "faces & words", '"I strongly prefer Old People to Young People." - "I strongly prefer Young People to Old People."',"*H*: <40 years, <br />*L*: >60 years", "Until 2016, the categories were labeled as young and old, rather than young people and old people.",
"Disability", "Abled persons vs disabled persons & Good vs bad", "pictograms", '""I strongly prefer Disabled people to Abled people." - "I strongly prefer Abled people to Disabled people."', "*H*: no disability, <br />*L*: has disability (yes/no self-report)", "",
"Race", "African American vs European American & good vs bad", "faces & words", '"I strongly prefer African Americans to European Americans." - "I strongly prefer European Americans to African Americans."',"*H*: Non-Hispanic White, <br />*L*: African-American", "Until 2016, the categories were labeled as Black people and White people",
"Sexuality", "Gay vs straight & good vs bad", "pictograms and drawings", '"I strongly prefer Gay People to Straight People." - "I strongly prefer Straight People to Gay People."', "*H*: Heterosexual, <br />*L*: Homosexual", "Participants are randomly divided between three target conditions in the IAT: gay men, lesbians or both. Here, we average across the conditions.",
"Skin tone", "Light vs dark skin & good vs bad", "paired drawings: light and dark-skinned & words", '"I strongly prefer Dark Skinned People to Light Skinned People." - "I strongly prefer Light Skinned People to Dark Skinned People."', "*H*: light-skinned, <br />*L*: dark-skinned (self-reported)", "Codebook lists two sets of stimuli - unclear how they differ.",
"Weight", "Fat vs thin people & good vs bad", "body silhouttes & words", '"I strongly prefer Fat People to Thin People." - "I strongly prefer Thin People to Fat People."', "*H*: 18.5<BMI<25, <br />*L*: BMI > 25", "Participants randomised between three conditions: face stimuli, or one of two sets of body stimuli. We present averages here.",
"Religion", "Any pair out of Christianity, Judaism and Islam & good vs bad", "single symbol (e.g., cross) & words", '"I strongly prefer [religion 1] to [religion 2]." - "I strongly prefer [religion 2] to President [religion 1]."', "*H*: Christianity, <br />*L*: Islam/Judaism (in Judaism vs. Islam test, Judaism is coded as high-status)", "This version only launched in 2017 - earlier versions are not comparable.",
"Transgender", "Transgender vs cisgender & good vs bad", "well-known individuals & words ", '"I strongly prefer transgender people to cisgender people." - "I strongly prefer cisgender people and transgendper people."', "*H*: cisgender, <br />*L*: transgender", "Launched in 2020. Trans-/cis-identity of stimuli is communicated and practiced before the test.")
test_descriptions %>%
gt(caption = "Overview over the Project Implicit tests considered here") %>%
fmt_markdown(5) %>%
tab_header(gt::md("**Measures in each test**")) %>%
tab_spanner("Implicit association test", 2:3) %>%
gt_apa_style()
| Measures in each test | |||||
|---|---|---|---|---|---|
| Test | Implicit association test | Explicit scale | Status coding | Note | |
| Association | Stimuli | ||||
Age |
Young people vs old people & good vs bad | faces & words | "I strongly prefer Old People to Young People." - "I strongly prefer Young People to Old People." | H: <40 years, |
Until 2016, the categories were labeled as young and old, rather than young people and old people. |
Disability |
Abled persons vs disabled persons & Good vs bad | pictograms | ""I strongly prefer Disabled people to Abled people." - "I strongly prefer Abled people to Disabled people." | H: no disability, |
|
Race |
African American vs European American & good vs bad | faces & words | "I strongly prefer African Americans to European Americans." - "I strongly prefer European Americans to African Americans." | H: Non-Hispanic White, |
Until 2016, the categories were labeled as Black people and White people |
Sexuality |
Gay vs straight & good vs bad | pictograms and drawings | "I strongly prefer Gay People to Straight People." - "I strongly prefer Straight People to Gay People." | H: Heterosexual, |
Participants are randomly divided between three target conditions in the IAT: gay men, lesbians or both. Here, we average across the conditions. |
Skin tone |
Light vs dark skin & good vs bad | paired drawings: light and dark-skinned & words | "I strongly prefer Dark Skinned People to Light Skinned People." - "I strongly prefer Light Skinned People to Dark Skinned People." | H: light-skinned, |
Codebook lists two sets of stimuli - unclear how they differ. |
Weight |
Fat vs thin people & good vs bad | body silhouttes & words | "I strongly prefer Fat People to Thin People." - "I strongly prefer Thin People to Fat People." | H: 18.5<BMI<25, |
Participants randomised between three conditions: face stimuli, or one of two sets of body stimuli. We present averages here. |
Religion |
Any pair out of Christianity, Judaism and Islam & good vs bad | single symbol (e.g., cross) & words | "I strongly prefer [religion 1] to [religion 2]." - "I strongly prefer [religion 2] to President [religion 1]." | H: Christianity, |
This version only launched in 2017 - earlier versions are not comparable. |
Transgender |
Transgender vs cisgender & good vs bad | well-known individuals & words | "I strongly prefer transgender people to cisgender people." - "I strongly prefer cisgender people and transgendper people." | H: cisgender, |
Launched in 2020. Trans-/cis-identity of stimuli is communicated and practiced before the test. |
Project Implicit launched in 1998, and built up a suite of implicit association tests in the coming years. Participants navigate to the Project Implicit webpage, select which test to take and provide informed consent. They then first complete an implicit association test, then explicit prejudice measures and finally provide demographic background information. Within each section, the presentation order is randomised. The resulting data has been made available on the OSF.
Since 2005, they have offered a range of comparable tests and employed a consistent scoring mechanism, so that we consider that period up to the end of 2020 here, encompassing a total of 17 years (-5843 days) of data collection. We considered all tests that concerned attitudes towards anonymous individuals (thus excluding US presidents), and that remained fundamentally unchanged over the period considered here (thus excluding those concerned with religion). These tests are described in Table 1.1.
Each participant navigated to the Project Implicit webpage and selected one of the tests to complete. After consent was obtained, they completed an implicit association test as well as measures of explicit preferences and demographic characteristics.
Implicit preferences were measured with the implicit association test that uses differences in reaction times to estimate the strength of associations (Greenwald, McGhee, and Schwartz 1998). Participants use two keys to classify stimuli into four categories (e.g., “African American,” “European American,” “bad,” “good”) over multiple blocks. After initial practice, categories are paired so that participants have to sort, for instance, African American faces into a category “African American or Good.” If this is done faster than sorting the same stimuli during the blocks featuring the “African American or Bad” category, this would constitute evidence for an association between African American and Good, and thus be interpreted as evidence for implicit bias in favor of African Americans. The presentation order of blocks as well as the assignment of categories to the left or right side of the screen was randomized. Based on the response times, an implicit bias score was calculated based on the D algorithm proposed by Greenwald et al. (2003). This results in scores between -2 and +2, with 0 indicating the absence of implicit bias in either direction.
Explicit preferences were mostly measured on seven-point scales centered around zero (no preference/association), though earlier tests included five-point scales with similar anchors. These were rescaled based on the method proposed by Dawes (2002), so that all reported data is based on the seven-point version.
fmt_large <- \(x, digits = 0) prettyNum(round(x, digits), big.mark = ",")
#Logged to GCS during filtering
exclusions <- tibble::tribble(
~iat, ~excluded, ~share,
"skin_tone", 91589, 4.1,
"disability", 72304, 7.5,
"sexuality", 367271, 11.8,
"age", 113665, 4.9,
"weight", 163050, 6.5,
"transgender", 15439, 5.9,
"rel_C_v_J", 10371, 4.9,
"rel_C_v_i", 11597, 5.4,
"rel_J_v_i", 14247, 7.2,
"race", 276824, 3.9
) %>% mutate(share = share/100, N = excluded/share)
m_sd <- function(x, na.rm = TRUE) {
paste0(sprintf("%0.2f", mean(x, na.rm = na.rm)), " (", sprintf("%0.2f", sd(x, na.rm = na.rm)), ")")
}
timing_counts <- count(iat_data, year, month, iat)
descr <- iat_data %>%
group_by(iat) %>%
summarise(
N = n(),
Age = m_sd(age),
"% female" = fmt_pct(mean(gender == "female", na.rm = T), 0),
implicit_M = m_sd(iat_score),
implicit_d = mean(iat_score) / sd(iat_score),
explicit_M = m_sd(att),
explicit_d = mean(att, na.rm = TRUE) / sd(iat_score),
r = cor(iat_score, att, use = "pair")
)
descr %<>% bind_rows(iat_data %>%
summarise(
N = n(),
Age = m_sd(age),
"% female" = fmt_pct(mean(gender == "female", na.rm = T), 0),
implicit_M = m_sd(iat_score),
implicit_d = mean(iat_score) / sd(iat_score),
explicit_M = m_sd(att),
explicit_d = mean(att, na.rm = TRUE) / sd(iat_score),
r = cor(iat_score, att, use = "pair")
) %>%
mutate(iat = "Total"))
daily <- iat_data %>%
group_by(iat, test_date) %>%
summarise(N = n()) %>%
summarise(daily = paste(fmt_large(quantile(N, .2)), "-", fmt_large(quantile(N, .8))))
daily %<>% bind_rows(iat_data %>%
group_by(test_date) %>%
summarise(N = n()) %>%
summarise(upper = quantile(N, .8), lower = quantile(N, .2), daily = paste(fmt_large(lower), "-", fmt_large(upper))) %>%
mutate(iat = "Total"))
descr %<>% left_join(daily) %>%
select(-lower, -upper) %>%
select(iat, N, "Daily N \n (*80% range*)" = daily, everything())
descr_no_total <- filter(descr, iat != "Total")
my_scale <- scales::col_numeric(c("#FEF0D9", "#990000"),
domain = range(c(descr$implicit_d, descr$explicit_d)), alpha = 0.75
)
#Get value of a variable in row that has minimum value on another variable
min_match <- function(data, label, value) {
data %>% filter({{value}} == min({{value}})) %>%
pull({{label}}) %>% .[1]
}
#Get value of a variable in row that has minimum value on another variable
max_match <- function(data, label, value) {
data %>% filter({{value}} == max({{value}})) %>%
pull({{label}}) %>% .[1]
}
low_status_shares <- count(iat_data, iat, status) %>%
group_by(iat) %>% mutate(share = n/sum(n)) %>% filter(status == "low")
descr2020 <- iat_data_2020 %>%
group_by(iat) %>%
summarise(
N = n(),
Age = m_sd(age),
"% female" = fmt_pct(mean(gender == "female", na.rm = T), 0),
implicit_M = m_sd(iat_score),
implicit_d = mean(iat_score) / sd(iat_score),
explicit_M = m_sd(att),
explicit_d = mean(att, na.rm = TRUE) / sd(iat_score),
r = cor(iat_score, att, use = "pair")
)
descr2020 %<>% bind_rows(iat_data_2020 %>%
summarise(
N = n(),
Age = m_sd(age),
"% female" = fmt_pct(mean(gender == "female", na.rm = T), 0),
implicit_M = m_sd(iat_score),
implicit_d = mean(iat_score) / sd(iat_score),
explicit_M = m_sd(att),
explicit_d = mean(att, na.rm = TRUE) / sd(iat_score),
r = cor(iat_score, att, use = "pair")
) %>%
mutate(iat = "Total"))
descr2020_only <- descr2020 %>% filter(!iat %in% descr$iat)
descr2020_no_total <- filter(descr2020, iat != "Total")
my_scale_2020 <- scales::col_numeric(c("#FEF0D9", "#990000"),
domain = range(c(descr2020$implicit_d, descr2020$explicit_d)), alpha = 0.75
)
choice_share <- function(data, choice, drop_na = TRUE, fmt_pct = TRUE) {
if (drop_na) {
data <- na.omit(data)
}
share <- mean(data %in% choice)
if (fmt_pct) {
share <- timesaveR::fmt_pct(share)
}
share
}
descr %>%
rename(Test = iat) %>%
gt::gt(caption = "Sample characteristics and mean responses by test (2006-2020)") %>%
gt::fmt_number(2, decimals = 0) %>%
gt::fmt_number(c(7,9,10), decimals = 2) %>%
gt::tab_source_note(gt::md("*Note.* Positive implicit and explicit bias scores indicate preference for higher-status group. Standard deviations shown in parentheses. Darker colors indicate stronger bias.")) %>%
gt_apa_style() %>%
gt::data_color(c(7, 9), colors = my_scale) %>%
tab_spanner("Implicit bias", 6:7) %>%
tab_spanner("Explicit bias", 8:9) %>%
cols_label(
implicit_M = "M (SD)", implicit_d = md("*d*"),
explicit_M = "M (SD)", explicit_d = md("*d*"),
r = md("*r*")
)
| Test | N | Daily N (80% range) | Age | % female | Implicit bias | Explicit bias | r | ||
|---|---|---|---|---|---|---|---|---|---|
| M (SD) | d | M (SD) | d | ||||||
Age |
1,225,363 | 75 - 320 | 28.06 (12.71) | 70% | 0.44 (0.38) | 1.15 | 0.41 (1.16) | 1.08 | 0.13 |
Disability |
515,328 | 29 - 135 | 29.33 (12.49) | 75% | 0.51 (0.44) | 1.16 | 0.39 (0.89) | 0.89 | 0.15 |
Race: White vs Black |
4,389,899 | 301 - 1,056 | 29.17 (12.83) | 62% | 0.30 (0.44) | 0.68 | 0.18 (1.04) | 0.42 | 0.31 |
Sexuality |
1,605,606 | 120 - 421 | 26.76 (11.41) | 66% | 0.22 (0.48) | 0.47 | 0.35 (1.22) | 0.72 | 0.45 |
Skin tone |
1,303,417 | 95 - 324 | 28.89 (12.08) | 70% | 0.30 (0.42) | 0.72 | 0.19 (0.92) | 0.45 | 0.23 |
Weight |
1,327,216 | 97 - 373 | 27.67 (11.87) | 73% | 0.46 (0.40) | 1.16 | 0.84 (1.06) | 2.10 | 0.21 |
Total |
10,366,829 | 724 - 2,513 | 28.46 (12.41) | 67% | 0.33 (0.44) | 0.76 | 0.33 (1.09) | 0.76 | 0.30 |
| Note. Positive implicit and explicit bias scores indicate preference for higher-status group. Standard deviations shown in parentheses. Darker colors indicate stronger bias. | |||||||||
descr2020 %>%
rename(Test = iat) %>%
gt::gt(caption = "Sample characteristics and mean responses by test (2020 only)") %>%
gt::fmt_number(2, decimals = 0) %>%
gt::fmt_number(c(6,8,9), decimals = 2) %>%
gt::tab_source_note(gt::md("*Note.* Positive implicit and explicit bias scores indicate preference for higher-status group. Standard deviations shown in parentheses. Darker colors indicate stronger bias.")) %>%
gt_apa_style() %>%
gt::data_color(c(6, 8), colors = my_scale_2020) %>%
tab_spanner("Implicit bias", 6:7) %>%
tab_spanner("Explicit bias", 8:9) %>%
cols_label(
implicit_M = "M (SD)", implicit_d = md("*d*"),
explicit_M = "M (SD)", explicit_d = md("*d*"),
r = md("*r*")
)
| Test | N | Age | % female | M (SD) | Implicit bias | Explicit bias | ||
|---|---|---|---|---|---|---|---|---|
| d | M (SD) | d | r | |||||
Age |
199,911 | 30.56 (14.03) | 70% | 0.43 (0.37) | 1.16 | 0.35 (1.14) | 0.94 | 0.13 |
Disability |
99,671 | 31.36 (13.34) | 76% | 0.50 (0.45) | 1.11 | 0.30 (0.83) | 0.67 | 0.15 |
Race: White vs Black |
733,731 | 34.03 (14.39) | 66% | 0.25 (0.44) | 0.57 | 0.04 (0.93) | 0.09 | 0.27 |
Rel: Christianity vs. Islam |
38,240 | 32.33 (14.40) | 64% | 0.39 (0.42) | 0.92 | 1.12 (1.59) | 2.67 | 0.42 |
Rel: Christianity vs. Judaism |
38,535 | 32.42 (14.37) | 64% | 0.31 (0.43) | 0.72 | 0.79 (1.71) | 1.83 | 0.45 |
Rel: Judaism vs. Islam |
35,560 | 33.03 (14.58) | 63% | 0.19 (0.38) | 0.52 | 0.63 (1.22) | 1.67 | 0.34 |
Sexuality |
214,486 | 29.70 (13.01) | 68% | 0.11 (0.48) | 0.23 | 0.05 (1.16) | 0.11 | 0.44 |
Skin tone |
238,150 | 32.53 (13.65) | 71% | 0.27 (0.42) | 0.66 | 0.08 (0.84) | 0.18 | 0.22 |
Transgender |
72,339 | 32.63 (13.42) | 67% | 0.09 (0.44) | 0.21 | 0.41 (1.08) | 0.93 | 0.34 |
Weight |
202,448 | 30.74 (13.38) | 74% | 0.47 (0.39) | 1.19 | 0.65 (1.01) | 1.64 | 0.21 |
Total |
1,873,071 | 32.35 (14.01) | 69% | 0.29 (0.44) | 0.65 | 0.22 (1.06) | 0.50 | 0.29 |
| Note. Positive implicit and explicit bias scores indicate preference for higher-status group. Standard deviations shown in parentheses. Darker colors indicate stronger bias. | ||||||||
Participants were excluded if they did not complete the IAT, or if their response quality on the IAT was low (based on Nosek et al. 2007). Specifically, this concerned participants who reponded in less than 300 ms on more than 10% of the trials, who had an error rate greater than 30% across the critical trials, or an error rate greater than 40% on a single trial. Overall, this affected 5.9% of responses. In addition, we only considered respondents who were residents of the United States of America, given that they made up the vast majority of the sample, and that the international reach of the different tests varies substantially.
Among the six tests that ran over the entire period from 2006 to 2020, the resulting number of respondents per test ranged from 515,328 respondents to the Disability-IAT to 4,389,899 to the Race: White vs Black-IAT, yielding a total sample of 10,366,829 responses. On 80% of days, there were between 724 and 2,513 responses. Sample sizes and descriptives per test can be found in Table 1.2.
Among the tests for religious bias and attitudes towards transgender individuals, which we only considered in 2020, the number of responses ranged from 35,560 respondents to the Rel: Judaism vs. Islam-IAT to 72,339 to the Transgender-IAT. Sample sizes and descriptives per test for 2020 only can be found in Table 1.3
In the longitudinal dataset, participants mean age was 28.5 (SD: 12.4). 66.6% were female. Regarding political orientation, 13.3% identified as strongly or moderately conservative, while 37.7% identified as strongly or moderately liberal. 67.1% identified as non-Hispanic White, while 11.8% identified as Hispanic and 9.7% as African American.
iat_data %>%
filter(year <= 2020) %>%
group_by(year, month, iat) %>%
summarise(N = n(), .groups = "drop") %>%
mutate(date = lubridate::ymd(paste(year, month, 15))) %>%
ggplot(aes(x = date, y = N, fill = reorder(iat, N, sum))) +
geom_bar(stat = "identity", alpha = 0.6, color = NA, size = 0, position = position_stack()) +
scale_fill_brewer(type = "qual", palette = 2) +
geom_vline(xintercept = lubridate::ymd("20050101") + lubridate::years(1:15), linetype = "dotted", size = .5, color = "darkgrey") + theme_minimal() +
labs(x = "", y = "", title = "", fill = "Test") +
scale_y_continuous(labels = scales::label_number(scale = 1e-3))
Figure 1.1: Responses per month (’000)
Most tests gained in popularity over time, as shown in Figure 1.1. The median number of responses per month per test was 6,558, with substantial seasonal variation (fewer tests in the summer) and differences between tests. However, 99.1% of month-test pairs consist of more than 500 observations.
While these datasets are very large, it needs to be noted that they are not representative of any specific population. Nevertheless, they are more diverse than the majority of datasets used in social psychology, and offer unique possibilities for comparisons across tests and over time. To the extent that selection factors are similar across the tests and demographic categories, analyses within and across the datasets might well give an indication regarding patterns of bias and change in the US population.
In line with Nosek et al. (2007), due to the sample size, we do not report significance tests. It is often implausible that there would be no relationship at all between two variables in the social sciences, and even the smallest sample of 515,328 respondents would give us 99% power to detect a trivial effect size of d = 0.01. Therefore, we focus our analysis and reporting on effect sizes.
In order to make the explicit and implicit measures more comparable, and to make the results more interpretable, we followed earlier research Westgate, Riskind, and Nosek (2015) and standardised these measures, retaining the 0 as a neutral point. Given the relative nature of the bias measures, these values can be interpreted as Cohen’s d.
To mitigate against the risk that differences between tests and across time are driven by different sample compositions, we calculated post-stratification weights for each test-year combination to match the overall sample in terms of age, gender, ethnicity, religion, religiosity and political orientation. In the tables, any sample characteristics (e.g., N) are unweighted, while any outcome measures (e.g. M, d) are weighted in this manner. Note that this does not address a key imbalance between the tests: the different share of high- and low-status respondents. For instance,
As indicated in the two summary tables, most tests showed evidence of substantial implicit and explicit bias.
pacman::p_load(ggridges, patchwork, ggnewscale)
#From https://stackoverflow.com/a/61894717
weighted.median <- function(x, w) {
w <- w[order(x)]
x <- x[order(x)]
prob <- cumsum(w)/sum(w)
ps <- which(abs(prob - .5) == min(abs(prob - .5)))
return(x[ps])
}
plot_distribution <- function(data, status_filter = ".*", title, outcome = iat_score) {
data <- data %>%
mutate(status = forcats::fct_explicit_na(status)) %>%
filter(stringr::str_detect(status, status_filter))
share_biased <- data %>%
group_by(iat) %>%
summarise(
pro_high = sum(({{outcome}} >= .15) * wt) / sum(wt),
pro_low = sum(({{outcome}} <= -.15) * wt) / sum(wt)
)
medians <- data %>%
group_by(iat) %>%
summarise(median = weighted.median({{outcome}}, wt),
data = list({{outcome}}))
median_lookup <- function(x, ...) {
browser()
medians$median[identical(medians$data, list(x))]
}
data %>% ggplot(aes(x = {{outcome}}, y = iat, fill = stat(x))) +
geom_density_ridges_gradient(aes(height = ..density.., weight = wt),
scale = 0.95,
stat = "density"#,
#quantile_lines = TRUE,
#quantile_fun = median_lookup
) +
scale_fill_viridis_c(name = "IAT scores", option = "viridis", direction = -1, limits = c(-2, 2)) +
labs(title = title, y = "", x = "") +
annotate("rect", xmin = -.15, xmax = .15, ymin = 0, ymax = length(unique(data$iat)) + 1, fill = "white", alpha = .4) +
geom_text(data = share_biased, aes(y = iat, x = 1.7, label = fmt_pct(pro_high, 0), fill = NULL), nudge_y = .5) +
geom_text(data = share_biased, aes(y = iat, x = -1.7, label = fmt_pct(pro_low, 0), fill = NULL), nudge_y = .5) +
theme(legend.position = "none") +
scale_x_continuous(limits = c(-2, 2))
}
iat_data_2020 %<>% mutate(iat = forcats::fct_reorder(iat, iat_score, .fun = mean))
p_all <- plot_distribution(iat_data_2020, title = "All respondents")
p_high <- plot_distribution(iat_data_2020, "high", title = "High-status respondents")
p_low <- plot_distribution(iat_data_2020, "low", title = "Low-status respondents")
my_theme <- jtools::theme_apa() + theme(legend.position = "none",
panel.grid.major.y = element_line(size = 0.5,
linetype = "dotted"))
(p_all / p_high / p_low) * my_theme + plot_annotation(
title = "Distribution of 2020 IAT scores",
subtitle = "... and share of biased respondents (|D| > .15)",
caption = "NB: Positive scores indicate preference for higher-status group; \n
Status is coded separately for each IAT"
)
Figure 2.1: Distribution of 2020 IAT scores
pacman::p_load(ggalt, tidyr)
plot_dumbbell <- function(data, groups, high, low, outcome = iat_score, outcome_str = "IAT score") {
outcome <- rlang::enquo(outcome)
dumbbell_df <- data %>%
drop_na(!!outcome) %>%
group_by(iat, {{ groups }}) %>%
summarise(outcome = weighted.mean(!!outcome, wt), .groups = "drop") %>%
tidyr::spread({{ groups }}, outcome) %>%
mutate(gap = {{ high }} - {{ low }}) %>%
arrange(desc(gap))
high <- rlang::enquo(high)
low <- rlang::enquo(low)
#Swap if high and low are switched
if ((summarise(dumbbell_df, max(!!high)) %>% pull()) < (summarise(dumbbell_df, max(!!low)) %>% pull() )) {
x <- high
high <- low
low <- x
}
ggplot(dumbbell_df, aes(x = !!low, xend = !!high, y = reorder(iat, !!high), group = iat)) +
geom_dumbbell(
colour = "#dddddd",
size = 3,
colour_x = "#FAAB18",
colour_xend = "#1380A1"
) +
labs(
y = "",
x = paste("Mean", outcome_str, "by", rlang::as_name(rlang::enquo(groups)))
) +
geom_text(data=data.frame(),
aes(x=dumbbell_df %>% summarise(max(!!high)) %>% pull(),
y= dumbbell_df %>% filter(!!high == max(!!high)) %>% pull(iat)%>% .[1],
label=rlang::as_name(high),
group = NULL, xend = NULL),
color="#1380A1", hjust=0, size=5, nudge_x=(dumbbell_df %>% summarise(max(!!high)) %>% pull())/30) +
geom_text(data=data.frame(),
aes(x=dumbbell_df %>%
filter(iat == (dumbbell_df %>% filter(!!high == max(!!high)) %>% pull(iat) %>% .[1])) %>%
pull(!!low),
y= dumbbell_df %>% filter(!!high == max(!!high)) %>% pull(iat)%>% .[1],
label=rlang::as_name(low),
group = NULL, xend = NULL),
color="#FAAB18", hjust=1, size=5, nudge_x=-(dumbbell_df %>% summarise(max(!!high)) %>% pull())/30) +
scale_x_continuous(expand=c(0,.015*stringr::str_length(rlang::as_name(high)))) +
theme_minimal()
}
#TK - write function to automatically center x-axis around 0
center_x_0 <- function(plot) {
ggplot_build(ggobj)$panel$ranges[[1]]$x.range
}
iat_data_2020 %<>% mutate(iat = forcats::fct_reorder(iat, att, .fun = mean, na.rm = TRUE))
plot_distribution_bins <- function(data, status_filter = ".*", title, outcome = att) {
data <- data %>%
mutate(status = forcats::fct_explicit_na(status)) %>%
filter(stringr::str_detect(status, status_filter)) %>%
filter(!is.na({{outcome}}))
share_biased <- data %>%
group_by(iat) %>%
summarise(
pro_high = sum(({{outcome}} >= 1) * wt) / sum(wt),
pro_low = sum(({{outcome}} <= -1) * wt) / sum(wt)
)
medians <- data %>%
group_by(iat) %>%
summarise(median = weighted.median({{outcome}}, wt))
data %>% ggplot(aes(x = {{outcome}}, y = iat, fill = stat(x))) +
geom_density_ridges_gradient(aes(weight = wt), stat = "binline", bins = 7, scale = 0.95, pad = FALSE
) +
scale_fill_viridis_c(name = "Responses", option = "viridis", direction = -1) +
labs(title = title, y = "", x = "") +
geom_text(data = share_biased, aes(y = iat, x = 3.5, label = fmt_pct(pro_high, 0), fill = NULL), nudge_y = .1) +
geom_text(data = share_biased, aes(y = iat, x = -3.5, label = fmt_pct(pro_low, 0), fill = NULL), nudge_y = .1) +
theme(legend.position = "none") +
scale_x_continuous(limits = c(-3.6, 3.6))
}
p_all <- plot_distribution_bins(iat_data_2020, title = "All respondents")
## Warning: Ignoring unknown aesthetics: weight
p_high <- plot_distribution_bins(iat_data_2020, "high", title = "High-status respondents")
## Warning: Ignoring unknown aesthetics: weight
p_low <- plot_distribution_bins(iat_data_2020, "low", title = "Low-status respondents")
## Warning: Ignoring unknown aesthetics: weight
my_theme <- jtools::theme_apa() + theme(legend.position = "none",
panel.grid.major.y = element_line(size = 0.5,
linetype = "dotted"))
(p_all / p_high / p_low) * my_theme + plot_annotation(
title = "Distribution of 2020 explicit attitudes",
subtitle = "... and share of respondents expressing explicit preferences",
caption = "NB: Positive scores indicate preference for higher-status group; \n
Status is coded separately for each IAT"
)
Figure 2.2: Distribution of 2020 explicit preferences
plot_dumbbell(iat_data_2020, status, high, low) +
labs(title = "Bias levels differ drastically between target groups",
subtitle = "... e.g., fitness biases are internalised, religion attitudes are symmetric")
Figure 2.3: Association between respondents’ status and their IAT scores
plot_dumbbell(iat_data_2020, status, high, low, att, "explicit preference") +
labs(title = "Low status groups tend to express greater explicit in-group preferences",
subtitle = "... except for fitness related categories") +
scale_x_continuous(limits = c(-2.5, 2.5)) + geom_vline(xintercept = 0, col = "orange", linetype = "dashed")
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
Figure 2.4: Association between respondents’ status and their explicit preferences
Figure 2.1 shows the distribution of implicit responses for each test. Overall, a majority of respondents shows a substatial implicit preference for the higher-status group on each test. However, an interesting pattern emerges when high- and low-status respondents are considered. The distribution of their responses differs drastically on some tests, yet not others. Figure 2.3 compares the mean responses and highlights diverging patterns: fitness-related biases are nearly uniformly shared, while implicit preferences with regard to sexuality, religion and transgender-identity reflect fairly symmetric ingroup biases. With regard to racism and colorism, the pattern is more mixed. It appears that an implicit preference for lighter skin is shared across groups, though attenuated among African-Americans. When it comes to ‘race,’ African Americans’ mean implicit preference is close to neutral, with at most weak evidence for ingroup bias. Figure 2.4 makes a similar comparison with regard to explicit preferences. Here, the most noteworthy pattern is that on all biases but those related to fitness, low-status respondents report a greater ingroup preference than high-status respondents do.
Figure 2.2 shows the distribution of explicit responses. Partly due to the very blatant measure and the coarseness of the measure (a single 7-point item), a majority of respondents expresses a neutral attitude on most tests. Nevertheless, it is worth noting that substantial minorities express an explicit preference for the higher-status group. This is particularly striking for ‘fitness-related’ biases, and rather rare for race-related matters. Interestingly, sexual orientation and transgender status fall in between, with approximately 1/3 of high-status respondents willing to express an explicit preference for their in-group.
plot_dumbbell(iat_data_2020, gender, male, female) +
labs(title = "Men are substantially more biased than women",
subtitle = "... particularly regarding disability and sexuality")
Figure 2.5: Association between respondents’ gender and their IAT scores
iat_data_2020 %>% mutate(`political orientation` = case_when(
politicalid < 3 ~ "conservative",
politicalid > 5 ~ "liberal",
TRUE ~ NA_character_)) %>%
plot_dumbbell(`political orientation`, liberal, conservative, att, "explicit preference") +
labs(title = "Liberals are generally less biased than conservatives",
subtitle = "... with much larger gaps on 'politicized' biases")
Figure 2.6: Association between respondents’ political orientation and their IAT scores
iat_data_2020 %>% mutate(`political orientation` = case_when(
politicalid < 3 ~ "conservative",
politicalid > 5 ~ "liberal",
TRUE ~ NA_character_)) %>%
plot_dumbbell(`political orientation`, liberal, conservative) +
labs(title = "Liberals are generally less biased than conservatives",
subtitle = "... with much larger gaps on 'politicized' biases")
Figure 2.7: Association between respondents’ political orientation and their explicit preferences
There is an extensive literature on predictors of specific biases. However, this data allows to test general associations between demographic characteristics.
For instance, across tests men typically show greater implicit bias than women do (Figure 2.5). The link between political orientation and bias was more complex - while Liberals tended to show less of an implicit preference for high status groups than Conservatives did, the gap was substantially larger for biases that have been politicized (Figure 2.6 and Figure 2.7).
monthly_means <- iat_data %>%
group_by(iat, year, month, ym) %>%
summarise(iat_score = weighted.mean(iat_score, wt), N = n()) %>%
#Need to filter due to erratic weight scores in 2004, based on 1-16 responses per month
filter(N > 100) %>%
mutate(date = lubridate::ymd(paste(year, month, 15)))
## `summarise()` has grouped output by 'iat', 'year', 'month'. You can override
## using the `.groups` argument.
daily_means <- iat_data %>%
group_by(iat, test_date) %>%
summarise(iat_score = weighted.mean(iat_score, wt), N = n())
## `summarise()` has grouped output by 'iat'. You can override using the `.groups`
## argument.
monthly_means %>%
ggplot(aes(x = date, y = iat_score, color = reorder(iat, -iat_score), group = iat)) +
geom_point(alpha = .2) +
geom_smooth(data = daily_means , aes(weight = N, x = test_date), se = FALSE) +
scale_color_brewer(type = "qual", palette = 2) +
labs(title = "Trends in implicit bias scores", subtitle = "Monthly means and GAM trendlines",
caption = "NB: Trendline based on daily means weighted by sample size", x = "", y = "", col = "Test") + jtools::theme_apa()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Figure 2.8: Trends in implicit bias (2006-2020)
monthly_means <- iat_data %>%
group_by(iat, year, month, ym) %>%
drop_na(att) %>%
summarise(exp_att = weighted.mean(att, wt), N = n()) %>%
#Need to filter due to erratic weight scores in 2014(?), based on 1-16 responses per month
filter(N > 100) %>%
mutate(date = lubridate::ymd(paste(year, month, 15)))
## `summarise()` has grouped output by 'iat', 'year', 'month'. You can override
## using the `.groups` argument.
daily_means <- iat_data %>%
drop_na(att) %>%
group_by(iat, test_date) %>%
summarise(exp_att = weighted.mean(att, wt), N = n())
## `summarise()` has grouped output by 'iat'. You can override using the `.groups`
## argument.
monthly_means %>%
ggplot(aes(x = date, y = exp_att, color = reorder(iat, -exp_att), group = iat)) +
geom_point(alpha = .2) +
geom_smooth(data = daily_means , aes(weight = N, x = test_date), se = FALSE, method = "gam") +
scale_color_brewer(type = "qual", palette = 2) +
labs(title = "Trends in explicit bias scores", subtitle = "Monthly means and GAM trendlines",
caption = "NB: Trendline based on daily means weighted by sample size", x = "", y = "", col = "Test") + jtools::theme_apa()
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
Figure 2.9: Trends in explicit preferences (2006-2020)
Initially, we plotted the overall trajectories of implicit and explicit biases since 2006. When it comes to implicit bias, the trajectories diverged substantially (see Figure 2.8). Weight bias increased over time, while bias related to sexual orientation fell sharply. Biases linked to skin color and race trended downwards more slowly, while disability and age-related bias was flat.
Interestingly, the trends for explicit bias were more uniform (see Figure 2.9). Over time, respondents became increasingly less likely to express explicit preferences for all high-status groups. Once again, however, the decline was most pronounced when it came to preferences for straight over gay people.
iat_data_all <- iat_data
iat_data %<>% filter(status == "high")
monthly_means <- iat_data %>%
group_by(iat, year, month, ym) %>%
summarise(iat_score = weighted.mean(iat_score, wt), N = n()) %>%
#Need to filter due to erratic weight scores in 2004, based on 1-16 responses per month
filter(N > 100) %>%
mutate(date = lubridate::ymd(paste(year, month, 15)))
## `summarise()` has grouped output by 'iat', 'year', 'month'. You can override
## using the `.groups` argument.
daily_means <- iat_data %>%
group_by(iat, test_date) %>%
summarise(iat_score = weighted.mean(iat_score, wt), N = n())
## `summarise()` has grouped output by 'iat'. You can override using the `.groups`
## argument.
monthly_means %>%
ggplot(aes(x = date, y = iat_score, color = reorder(iat, -iat_score), group = iat)) +
geom_point(alpha = .2) +
geom_smooth(data = daily_means , aes(weight = N, x = test_date), se = FALSE) +
scale_color_brewer(type = "qual", palette = 2) +
labs(title = "Trends in implicit bias scores (high status)", subtitle = "Monthly means and GAM trendlines",
caption = "NB: Trendline based on daily means weighted by sample size", x = "", y = "", col = "Test") + jtools::theme_apa()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Figure 2.10: Trends in implicit bias (2006-2020, high-status respondents)
monthly_means <- iat_data %>%
group_by(iat, year, month, ym) %>%
drop_na(att) %>%
summarise(exp_att = weighted.mean(att, wt), N = n()) %>%
#Need to filter due to erratic weight scores in 2004, based on 1-16 responses per month
filter(N > 100) %>%
mutate(date = lubridate::ymd(paste(year, month, 15)))
## `summarise()` has grouped output by 'iat', 'year', 'month'. You can override
## using the `.groups` argument.
daily_means <- iat_data %>%
drop_na(att) %>%
group_by(iat, test_date) %>%
summarise(exp_att = weighted.mean(att, wt), N = n())
## `summarise()` has grouped output by 'iat'. You can override using the `.groups`
## argument.
monthly_means %>%
ggplot(aes(x = date, y = exp_att, color = reorder(iat, -exp_att), group = iat)) +
geom_point(alpha = .2) +
geom_smooth(data = daily_means , aes(weight = N, x = test_date), se = FALSE, method = "gam") +
scale_color_brewer(type = "qual", palette = 2) +
labs(title = "Trends in explicit bias scores (high status)", subtitle = "Monthly means and GAM trendlines",
caption = "NB: Trendline based on daily means weighted by sample size", x = "", y = "", col = "Test") + jtools::theme_apa()
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
Figure 2.11: Trends in explicit preferences (2006-2020, high-status respondents)
iat_data <- iat_data_all
rm(iat_data_all)
For high-status respondents only, the patterns can be found in Figure 2.10 and Figure 2.11
The trend lines estimated with a general additive model (GAM) do not assume any particular shape. They suggest that changes in levels of bias were likely not linear, but generally monotonous, with few if any sustained changes in the overall direction. Polynomial regression models with quadratic and higher-level terms could fit regression lines that match the patterns identified in the GAMs. However, as it appears highly unlikely that these patterns represent (long-term) seasonality that might recur in the future, they would seem to be of limited use when it comes to understanding long-term trends and to cautious extrapolation. Therefore, we fitted simple linear regression models to understand the changes that came about over time and to (theoretically) extrapolate them into the future.
current_bias <- iat_data %>% filter(year == 2020) %>%
group_by(iat) %>% summarise(bias = mean(iat_score))
date_lms <- iat_data %>% group_by(iat) %>%
do(fit_date = lm(iat_score ~ test_date, weight = wt, data = .))
lm_res <- date_lms %>%
left_join(current_bias) %>%
mutate(r2_pct = glance(fit_date)$r.squared*100, p.value = glance(fit_date)$p.value,
coef = tidy(fit_date)$estimate[2],
annual_change = coef*365,
total_change = coef * as.numeric(max(iat_data$test_date) - min(iat_data$test_date)),
years_to_neutral = bias/annual_change) %>%
select(-fit_date)
## Joining, by = "iat"
gt::gt(lm_res, caption = "Linear trends in implicit bias") %>% gt_apa_style()
| iat | bias | r2_pct | p.value | coef | annual_change | total_change | years_to_neutral |
|---|---|---|---|---|---|---|---|
Age |
0.4306897 | 0.00898770 | 9.146214e-26 | -2.183438e-06 | -0.0007969547 | -0.01196087 | -540.419290 |
Disability |
0.4978911 | 0.00660351 | 5.424944e-09 | 2.129982e-06 | 0.0007774434 | 0.01166804 | 640.420941 |
Race: White vs Black |
0.2486542 | 0.38318608 | 0.000000e+00 | -1.627011e-05 | -0.0059385897 | -0.08912766 | -41.870921 |
Sexuality |
0.1101791 | 2.51918175 | 0.000000e+00 | -4.601692e-05 | -0.0167961773 | -0.25208071 | -6.559771 |
Skin tone |
0.2741294 | 0.37425945 | 0.000000e+00 | -1.524339e-05 | -0.0055638375 | -0.08350329 | -49.269849 |
Weight |
0.4697231 | 0.41869775 | 0.000000e+00 | 1.585388e-05 | 0.0057866674 | 0.08684757 | 81.173334 |
current_bias <- iat_data %>% filter(year == 2020) %>%
group_by(iat) %>% summarise(bias = mean(att, na.rm = TRUE))
date_lms <- iat_data %>% drop_na(att) %>% group_by(iat) %>%
do(fit_date = lm(att ~ test_date, weight = wt, data = .))
lm_res <- date_lms %>%
left_join(current_bias) %>%
mutate(r2_pct = glance(fit_date)$r.squared*100, p.value = glance(fit_date)$p.value,
coef = tidy(fit_date)$estimate[2],
annual_change = coef*365,
total_change = coef * as.numeric(max(iat_data$test_date) - min(iat_data$test_date)),
years_to_neutral = bias/annual_change) %>%
select(-fit_date)
## Joining, by = "iat"
gt::gt(lm_res, caption = "Linear trends in explicit preferences") %>% gt_apa_style()
| iat | bias | r2_pct | p.value | coef | annual_change | total_change | years_to_neutral |
|---|---|---|---|---|---|---|---|
Age |
0.34770223 | 0.3081378 | 0 | -3.928885e-05 | -0.01434043 | -0.2152243 | -24.246292 |
Disability |
0.29847286 | 0.9829409 | 0 | -5.527930e-05 | -0.02017695 | -0.3028200 | -14.792767 |
Race: White vs Black |
0.03928152 | 1.4285406 | 0 | -7.446847e-05 | -0.02718099 | -0.4079383 | -1.445184 |
Sexuality |
0.05492981 | 2.6950435 | 0 | -1.221972e-04 | -0.04460198 | -0.6693962 | -1.231555 |
Skin tone |
0.07648301 | 1.0577779 | 0 | -5.675779e-05 | -0.02071659 | -0.3109192 | -3.691872 |
Weight |
0.64680825 | 1.2877984 | 0 | -7.394252e-05 | -0.02698902 | -0.4050571 | -23.965608 |
iat_data_all <- iat_data
iat_data %<>% filter(status == "high")
current_bias <- iat_data %>% filter(year == 2020) %>%
group_by(iat) %>% summarise(bias = mean(iat_score))
date_lms <- iat_data %>% group_by(iat) %>%
do(fit_date = lm(iat_score ~ test_date, weight = wt, data = .))
lm_res <- date_lms %>%
left_join(current_bias) %>%
mutate(r2_pct = glance(fit_date)$r.squared*100, p.value = glance(fit_date)$p.value,
coef = tidy(fit_date)$estimate[2],
annual_change = coef*365,
total_change = coef * as.numeric(max(iat_data$test_date) - min(iat_data$test_date)),
years_to_neutral = bias/annual_change) %>%
select(-fit_date)
## Joining, by = "iat"
gt::gt(lm_res, caption = "Linear trends in implicit bias for high-status respondents") %>% gt_apa_style()
| iat | bias | r2_pct | p.value | coef | annual_change | total_change | years_to_neutral |
|---|---|---|---|---|---|---|---|
Age |
0.4305495 | 0.008650003 | 2.234470e-20 | -2.108354e-06 | -0.0007695492 | -0.01154956 | -559.48275 |
Disability |
0.5174018 | 0.033378061 | 1.805641e-32 | 4.684665e-06 | 0.0017099029 | 0.02566260 | 302.59133 |
Race: White vs Black |
0.3008688 | 0.619009525 | 0.000000e+00 | -1.935029e-05 | -0.0070628570 | -0.10600091 | -42.59874 |
Sexuality |
0.2509742 | 1.943404712 | 0.000000e+00 | -3.624278e-05 | -0.0132286141 | -0.19853794 | -18.97207 |
Skin tone |
0.3170483 | 0.514024055 | 0.000000e+00 | -1.712473e-05 | -0.0062505270 | -0.09380928 | -50.72346 |
Weight |
0.4993160 | 0.135321045 | 3.140411e-185 | 9.503982e-06 | 0.0034689535 | 0.05206282 | 143.93852 |
current_bias <- iat_data %>% filter(year == 2020) %>%
group_by(iat) %>% summarise(bias = mean(att, na.rm = TRUE))
date_lms <- iat_data %>% drop_na(att) %>% group_by(iat) %>%
do(fit_date = lm(att ~ test_date, weight = wt, data = .))
lm_res <- date_lms %>%
left_join(current_bias) %>%
mutate(r2_pct = glance(fit_date)$r.squared*100, p.value = glance(fit_date)$p.value,
coef = tidy(fit_date)$estimate[2],
annual_change = coef*365,
total_change = coef * as.numeric(max(iat_data$test_date) - min(iat_data$test_date)),
years_to_neutral = bias/annual_change) %>%
select(-fit_date)
## Joining, by = "iat"
gt::gt(lm_res, caption = "Linear trends in explicit preferences for high-status respondents") %>% gt_apa_style()
| iat | bias | r2_pct | p.value | coef | annual_change | total_change | years_to_neutral |
|---|---|---|---|---|---|---|---|
Age |
0.4669195 | 0.2369158 | 0 | -3.467933e-05 | -0.01265795 | -0.1899734 | -36.887436 |
Disability |
0.3417675 | 0.9759784 | 0 | -5.408770e-05 | -0.01974201 | -0.2962924 | -17.311689 |
Race: White vs Black |
0.2475726 | 2.5033465 | 0 | -7.869742e-05 | -0.02872456 | -0.4311045 | -8.618846 |
Sexuality |
0.4402305 | 2.5853407 | 0 | -1.075858e-04 | -0.03926882 | -0.5893551 | -11.210688 |
Skin tone |
0.1902686 | 1.3107425 | 0 | -5.920642e-05 | -0.02161034 | -0.3243328 | -8.804515 |
Weight |
0.8528131 | 1.1165042 | 0 | -7.419293e-05 | -0.02708042 | -0.4064289 | -31.491873 |
iat_data <- iat_data_all
rm(iat_data_all)
For implicit bias, they can be seen in Table 2.1. If one restricts the analysis to high-status respondents, the results are in Table 2.3.
For explicit bias, see Table 2.2. If one restricts the analysis to high-status respondents, the results are in Table 2.4.
Distributions highlight that the shift since 2006 has been modest.
iat_data %<>% mutate(iat = forcats::fct_reorder(iat, iat_score, .fun = mean))
plot_distributions <- function(data1, data2, status_filter = ".*", title) {
data1 <- data1 %>%
mutate(status = forcats::fct_explicit_na(status)) %>%
filter(stringr::str_detect(status, status_filter))
data2 <- data2 %>%
mutate(status = forcats::fct_explicit_na(status)) %>%
filter(stringr::str_detect(status, status_filter))
share_biased1 <- data1 %>%
group_by(iat) %>%
summarise(
pro_high = sum((iat_score >= .15) * wt) / sum(wt),
pro_low = sum((iat_score <= -.15) * wt) / sum(wt)
)
share_biased2 <- data2 %>%
group_by(iat) %>%
summarise(
pro_high = sum((iat_score >= .15) * wt) / sum(wt),
pro_low = sum((iat_score <= -.15) * wt) / sum(wt)
)
ggplot() +
geom_density_ridges_gradient(data = data1,
mapping = aes(x = iat_score, y = iat, fill = stat(x), height = ..density.., weight = wt),
scale = 0.95,
stat = "density"
) +
scale_fill_distiller(type = "seq", direction = -1, palette = "Greys") +
new_scale_fill() +
geom_density_ridges_gradient(data = data2,
mapping = aes(x = iat_score, y = iat, fill = stat(x), height = ..density.., weight = wt),
scale = 0.95,
stat = "density"
)+
scale_fill_viridis_c(name = "IAT scores", option = "magma", direction = -1, limits = c(-2, 2), alpha = .5) +
labs(title = title, y = "", x = "") +
annotate("rect", xmin = -.15, xmax = .15, ymin = 0, ymax = length(unique(data1$iat)) + 1, fill = "white", alpha = .4) +
geom_text(data = share_biased1, aes(y = iat, x = 1.7, label = fmt_pct(pro_high, 0), fill = NULL), nudge_y = .25, color = "azure4") +
geom_text(data = share_biased1, aes(y = iat, x = -1.7, label = fmt_pct(pro_low, 0), fill = NULL), nudge_y = .25, color = "azure4") +
geom_text(data = share_biased2, aes(y = iat, x = 1.7, label = fmt_pct(pro_high, 0), fill = NULL), nudge_y = .65, color = "darkorchid4") +
geom_text(data = share_biased2, aes(y = iat, x = -1.7, label = fmt_pct(pro_low, 0), fill = NULL), nudge_y = .65, color = "darkorchid4") +
theme(legend.position = "none") +
scale_x_continuous(limits = c(-2, 2))
}
p_all <- plot_distributions(iat_data %>% filter(year == 2006), iat_data %>% filter(year == 2020), title = "All respondents") + theme_ridges(grid = FALSE) + theme(legend.position = "none")
p_high <- plot_distributions(iat_data %>% filter(year == 2006), iat_data %>% filter(year == 2020), "high", title = "High-status respondents") + theme_ridges(grid = FALSE) + theme(legend.position = "none")
p_low <- plot_distributions(iat_data %>% filter(year == 2006), iat_data %>% filter(year == 2020), "low", title = "Low-status respondents") + theme_ridges(grid = FALSE) + theme(legend.position = "none")
(p_all/
p_high/
p_low) + plot_annotation(
title = "Distribution of IAT scores in 2020 (purple) vs 2006 (grey)",
subtitle = "... and share of biased respondents (|D| > .15)",
caption = "NB: Positive scores indicate preference for higher-status group; \n
Status is coded separately for each IAT")